package com.smartandroid.sa.tag.parser;

import com.smartandroid.sa.tag.helper.Validate;
import com.smartandroid.sa.tag.nodes.Entities;

/**
 * Readers the input stream into tokens.
 */
class Tokeniser {
	static final char replacementChar = '\uFFFD'; // replaces null character

	private CharacterReader reader; // html input
	private ParseErrorList errors; // errors found while tokenising

	private TokeniserState state = TokeniserState.Data; // current tokenisation
														// state
	private Token emitPending; // the token we are about to emit on next read
	private boolean isEmitPending = false;
	private StringBuilder charBuffer = new StringBuilder(); // buffers
															// characters to
															// output as one
															// token
	StringBuilder dataBuffer; // buffers data looking for </script>

	Token.Tag tagPending; // tag we are building up
	Token.Doctype doctypePending; // doctype building up
	Token.Comment commentPending; // comment building up
	private Token.StartTag lastStartTag; // the last start tag emitted, to test
											// appropriate end tag
	private boolean selfClosingFlagAcknowledged = true;

	Tokeniser(CharacterReader reader, ParseErrorList errors) {
		this.reader = reader;
		this.errors = errors;
	}

	Token read() {
		if (!selfClosingFlagAcknowledged) {
			error("Self closing flag not acknowledged");
			selfClosingFlagAcknowledged = true;
		}

		while (!isEmitPending)
			state.read(this, reader);

		// if emit is pending, a non-character token was found: return any chars
		// in buffer, and leave token for next read:
		if (charBuffer.length() > 0) {
			String str = charBuffer.toString();
			charBuffer.delete(0, charBuffer.length());
			return new Token.Character(str);
		} else {
			isEmitPending = false;
			return emitPending;
		}
	}

	void emit(Token token) {
		Validate.isFalse(isEmitPending, "There is an unread token pending!");

		emitPending = token;
		isEmitPending = true;

		if (token.type == Token.TokenType.StartTag) {
			Token.StartTag startTag = (Token.StartTag) token;
			lastStartTag = startTag;
			if (startTag.selfClosing)
				selfClosingFlagAcknowledged = false;
		} else if (token.type == Token.TokenType.EndTag) {
			Token.EndTag endTag = (Token.EndTag) token;
			if (endTag.attributes != null)
				error("Attributes incorrectly present on end tag");
		}
	}

	void emit(String str) {
		// buffer strings up until last string token found, to emit only one
		// token for a run of character refs etc.
		// does not set isEmitPending; read checks that
		charBuffer.append(str);
	}

	void emit(char[] chars) {
		charBuffer.append(chars);
	}

	void emit(char c) {
		charBuffer.append(c);
	}

	TokeniserState getState() {
		return state;
	}

	void transition(TokeniserState state) {
		this.state = state;
	}

	void advanceTransition(TokeniserState state) {
		reader.advance();
		this.state = state;
	}

	void acknowledgeSelfClosingFlag() {
		selfClosingFlagAcknowledged = true;
	}

	char[] consumeCharacterReference(Character additionalAllowedCharacter,
			boolean inAttribute) {
		if (reader.isEmpty())
			return null;
		if (additionalAllowedCharacter != null
				&& additionalAllowedCharacter == reader.current())
			return null;
		if (reader.matchesAny('\t', '\n', '\r', '\f', ' ', '<', '&'))
			return null;

		reader.mark();
		if (reader.matchConsume("#")) { // numbered
			boolean isHexMode = reader.matchConsumeIgnoreCase("X");
			String numRef = isHexMode ? reader.consumeHexSequence() : reader
					.consumeDigitSequence();
			if (numRef.length() == 0) { // didn't match anything
				characterReferenceError("numeric reference with no numerals");
				reader.rewindToMark();
				return null;
			}
			if (!reader.matchConsume(";"))
				characterReferenceError("missing semicolon"); // missing semi
			int charval = -1;
			try {
				int base = isHexMode ? 16 : 10;
				charval = Integer.valueOf(numRef, base);
			} catch (NumberFormatException e) {
			} // skip
			if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF)
					|| charval > 0x10FFFF) {
				characterReferenceError("character outside of valid range");
				return new char[] { replacementChar };
			} else {
				// todo: implement number replacement table
				// todo: check for extra illegal unicode points as parse errors
				return Character.toChars(charval);
			}
		} else { // named
			// get as many letters as possible, and look for matching entities.
			String nameRef = reader.consumeLetterThenDigitSequence();
			boolean looksLegit = reader.matches(';');
			// found if a base named entity without a ;, or an extended entity
			// with the ;.
			boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities
					.isNamedEntity(nameRef) && looksLegit));

			if (!found) {
				reader.rewindToMark();
				if (looksLegit) // named with semicolon
					characterReferenceError(String.format(
							"invalid named referenece '%s'", nameRef));
				return null;
			}
			if (inAttribute
					&& (reader.matchesLetter() || reader.matchesDigit() || reader
							.matchesAny('=', '-', '_'))) {
				// don't want that to match
				reader.rewindToMark();
				return null;
			}
			if (!reader.matchConsume(";"))
				characterReferenceError("missing semicolon"); // missing semi
			return new char[] { Entities.getCharacterByName(nameRef) };
		}
	}

	Token.Tag createTagPending(boolean start) {
		tagPending = start ? new Token.StartTag() : new Token.EndTag();
		return tagPending;
	}

	void emitTagPending() {
		tagPending.finaliseTag();
		emit(tagPending);
	}

	void createCommentPending() {
		commentPending = new Token.Comment();
	}

	void emitCommentPending() {
		emit(commentPending);
	}

	void createDoctypePending() {
		doctypePending = new Token.Doctype();
	}

	void emitDoctypePending() {
		emit(doctypePending);
	}

	void createTempBuffer() {
		dataBuffer = new StringBuilder();
	}

	boolean isAppropriateEndTagToken() {
		if (lastStartTag == null)
			return false;
		return tagPending.tagName.equals(lastStartTag.tagName);
	}

	String appropriateEndTagName() {
		return lastStartTag.tagName;
	}

	void error(TokeniserState state) {
		if (errors.canAddError())
			errors.add(new ParseError(reader.pos(),
					"Unexpected character '%s' in input state [%s]", reader
							.current(), state));
	}

	void eofError(TokeniserState state) {
		if (errors.canAddError())
			errors.add(new ParseError(
					reader.pos(),
					"Unexpectedly reached end of file (EOF) in input state [%s]",
					state));
	}

	private void characterReferenceError(String message) {
		if (errors.canAddError())
			errors.add(new ParseError(reader.pos(),
					"Invalid character reference: %s", message));
	}

	private void error(String errorMsg) {
		if (errors.canAddError())
			errors.add(new ParseError(reader.pos(), errorMsg));
	}

	boolean currentNodeInHtmlNS() {
		// todo: implement namespaces correctly
		return true;
		// Element currentNode = currentNode();
		// return currentNode != null && currentNode.namespace().equals("HTML");
	}

	/**
	 * Utility method to consume reader and unescape entities found within.
	 * 
	 * @param inAttribute
	 * @return unescaped string from reader
	 */
	String unescapeEntities(boolean inAttribute) {
		StringBuilder builder = new StringBuilder();
		while (!reader.isEmpty()) {
			builder.append(reader.consumeTo('&'));
			if (reader.matches('&')) {
				reader.consume();
				char[] c = consumeCharacterReference(null, inAttribute);
				if (c == null || c.length == 0)
					builder.append('&');
				else
					builder.append(c);
			}
		}
		return builder.toString();
	}
}